In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import plotly.offline as pyo
import plotly.graph_objects as go
from plotly.subplots import make_subplots
In [2]:
df = pd.read_json('hotel_reviews.json', orient='records', lines=True)

df_pos = df[df['sentiment'] == 'positive']

df_neg = df[df['sentiment'] == 'negative']

df_neu = df[df['sentiment'] == 'neutral']

Social Network Distribution

In [3]:
fig = go.Figure(data=[go.Pie(labels=sorted(df['social-network'].unique()),
                             values=df['social-network'].value_counts().sort_index().tolist(),
                             text=sorted(df['social-network'].unique()),
                             textposition='auto',
                             hoverinfo='label+percent')])

fig.update_layout(title={'text':'Social Network Distribution',
                           'y':0.9,
                           'x':0.5,
                           'xanchor':'center',
                           'yanchor':'top'},
                    width=900,
                    height=700,
                    font=dict(family="Courier New, monospace",
                            size=14,
                            color="#111211")
                    )


fig.show(renderer='notebook')

Sentiment Distribution by Social Network

In [4]:
fig = go.Figure(data=[
    go.Bar(name='Booking',
           x=sorted(df['sentiment'].unique().tolist()),
           y=df[df['social-network'] == 'booking']['sentiment'].value_counts().sort_index(),
           text=df[df['social-network'] == 'booking']['sentiment'].value_counts().sort_index(),
           textposition='auto'),
    
    go.Bar(name='Facebook',
           x=sorted(df['sentiment'].unique().tolist()),
           y=df[df['social-network'] == 'facebook']['sentiment'].value_counts().sort_index(),
           text=df[df['social-network'] == 'facebook']['sentiment'].value_counts().sort_index(),
           textposition='auto'),
    
    go.Bar(name='Instagram',
           x=sorted(df['sentiment'].unique().tolist()),
           y=df[df['social-network'] == 'instagram']['sentiment'].value_counts().sort_index(),
           text=df[df['social-network'] == 'instagram']['sentiment'].value_counts().sort_index(),
           textposition='auto'),

    
    go.Bar(name='TripAdvisor',
           x=sorted(df['sentiment'].unique().tolist()),
           y=df[df['social-network'] == 'tripadvisor']['sentiment'].value_counts().sort_index(),
           text=df[df['social-network'] == 'tripadvisor']['sentiment'].value_counts().sort_index(),
           textposition='auto'),

    go.Bar(name='Twitter',
           x=sorted(df['sentiment'].unique().tolist()),
           y=df[df['social-network'] == 'twitter']['sentiment'].value_counts().sort_index(),
           text=df[df['social-network'] == 'twitter']['sentiment'].value_counts().sort_index(),
           textposition='auto'),

    go.Bar(name='Yelp',
           x=sorted(df['sentiment'].unique().tolist()),
           y=df[df['social-network'] == 'yelp']['sentiment'].value_counts().sort_index(),
           text=df[df['social-network'] == 'yelp']['sentiment'].value_counts().sort_index(),
           textposition='auto'),
])


fig.update_layout(barmode='stack',
                  xaxis_title="Sentiments", 
                  yaxis_title="Counts",
                  title={'text':'Sentiment Distribution by Social Network',
                         'y':0.9,
                         'x':0.5,
                         'xanchor':'center',
                         'yanchor':'top'},
                  width=950,
                  height=700,
                  font=dict(family="Courier New, monospace",
                            size=14,
                            color="#111211")
                    )


# pyo.plot(fig)
fig.show(renderer="notebook")

Emotion Distribution

In [5]:
fig = make_subplots(rows=1,
                    cols=2,
                    specs=[[{'type':'domain'}, {'type':'domain'}]])

fig.add_trace(go.Pie(labels=sorted(df_pos['deep_sentiment'].unique().tolist()),
                     values=df_pos['deep_sentiment'].value_counts().sort_index(),
                     name='Positive',
                     text=sorted(df_pos['deep_sentiment'].unique().tolist()),
                     textposition='auto'), 1, 1)

fig.add_trace(go.Pie(labels=sorted(df_neg['deep_sentiment'].unique().tolist()),
                     values=df_neg['deep_sentiment'].value_counts().sort_index(),
                     name='Negative',
                     text=sorted(df_neg['deep_sentiment'].unique().tolist()),
                     textposition='auto'), 1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.5)


fig.update_layout(title={'text':'Emotion Distribution',
                         'y':0.9,
                         'x':0.5,
                         'xanchor':'center',
                         'yanchor':'top'},
                  width=900,
                  height=700,
                  font=dict(family="Courier New, monospace",
                            size=14,
                            color="#111211"),
                  annotations=[dict(text='Positive', x=0.15, y=0.5, font_size=20, showarrow=False),
                               dict(text='Negative', x=0.85, y=0.5, font_size=20, showarrow=False)]
                 )




fig.show(renderer='notebook')

Aspect Distribution

In [6]:
fig = go.Figure(data=[
    go.Bar(name='Aspect',
           x=sorted(df['aspect'].unique()),
           y=df['aspect'].value_counts().sort_index(),
           text=df['aspect'].value_counts().sort_index(),
           textposition='auto')
])


fig.update_layout(title={'text':'Aspect Distribution',
                           'y':0.9,
                           'x':0.5,
                           'xanchor':'center',
                           'yanchor':'top'},
                    width=900,
                    height=700,
                    font=dict(family="Courier New, monospace",
                            size=14,
                            color="#111211")
                    )


fig.show(renderer='notebook')

Sentiments by each Aspects

In [7]:
fig = go.Figure(data=[
    go.Bar(name='Positive',
           x=sorted(df['aspect'].unique().tolist()),
           y=pd.Series(df[df['sentiment'] == 'positive']['aspect'].value_counts()).sort_index().tolist(),
           text=pd.Series(df[df['sentiment'] == 'positive']['aspect'].value_counts()).sort_index().tolist(),
           textposition='auto'),
    
    go.Bar(name='Negative',
           x=sorted(df['aspect'].unique().tolist()),
           y=pd.Series(df[df['sentiment'] == 'negative']['aspect'].value_counts()).sort_index().tolist(),
           text=pd.Series(df[df['sentiment'] == 'negative']['aspect'].value_counts()).sort_index().tolist(),
           textposition='auto'),
    
    go.Bar(name='Neutral',
           x=sorted(df['aspect'].unique().tolist()),
           y=pd.Series(df[df['sentiment'] == 'neutral']['aspect'].value_counts()).sort_index().tolist(),
           text=pd.Series(df[df['sentiment'] == 'neutral']['aspect'].value_counts()).sort_index().tolist(),
           textposition='auto')
])


fig.update_layout(barmode='stack',
                  xaxis_title="Review Topics", 
                  yaxis_title="Counts",
                  title={'text':'Sentiment Distribution by Aspect',
                         'y':0.9,
                         'x':0.5,
                         'xanchor':'center',
                         'yanchor':'top'},
                  width=900,
                  height=700,
                  font=dict(family="Courier New, monospace",
                            size=14,
                            color="#111211")
                    )


# pyo.plot(fig)
fig.show(renderer="notebook")

Monthly Sentiment Distribution

In [8]:
df.loc[:, 'month_year'] = df['date'].dt.strftime('%B-%Y')

pos_count = df[df['sentiment'] == 'positive']['date'].value_counts().reset_index()

neg_count = df[df['sentiment'] == 'negative']['date'].value_counts().reset_index()

neu_count = df[df['sentiment'] == 'neutral']['date'].value_counts().reset_index()


date=df['date'].sort_values().dt.strftime('%B-%Y').unique().tolist()

fig = go.Figure(data=[
    go.Bar(name='Positive',
           x=date,
           y=pos_count.groupby(pos_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           text=pos_count.groupby(pos_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           textposition='auto'),
    
    go.Bar(name='Negative',
           x=date,
           y=neg_count.groupby(neg_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           text=neg_count.groupby(neg_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           textposition='auto'),
    
    go.Bar(name='Neutral',
           x=date,
           y=neu_count.groupby(neu_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           text=neu_count.groupby(neu_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           textposition='auto'),
    ])


fig.update_layout(barmode='stack', 
                  yaxis_title="Counts",
                  title={'text':'Monthly Sentiment Distribution',
                         'y':0.9,
                         'x':0.5,
                         'xanchor':'center',
                         'yanchor':'top'},
                  width=950,
                  height=700,
                  font=dict(family="Courier New, monospace",
                            size=14,
                            color="#111211")
                    )


# Add slider
fig.update_xaxes(rangeslider_visible=True)


# pyo.plot(fig)
fig.show(renderer="notebook")

Percentage Change of Sentiments

In [9]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df['date'].sort_values().dt.strftime('%B-%Y').unique().tolist(),
                         y=(pos_count.groupby(pos_count['index'].dt.to_period('M')).sum()['date'].pct_change()*100).tolist(),
                    mode='lines+markers',
                    name='Positive'))

fig.add_trace(go.Scatter(x=df['date'].sort_values().dt.strftime('%B-%Y').unique().tolist(),
                         y=(neg_count.groupby(neg_count['index'].dt.to_period('M')).sum()['date'].pct_change()*100).tolist(),
                    mode='lines+markers',
                    name='Negative'))

fig.add_trace(go.Scatter(x=df['date'].sort_values().dt.strftime('%B-%Y').unique().tolist(),
                         y=(neu_count.groupby(neg_count['index'].dt.to_period('M')).sum()['date'].pct_change()*100).tolist(),
                    mode='lines+markers',
                    name='Neutral'))


fig.update_layout(yaxis_title="%",
                  title={'text':'Percentage Change of Sentiments',
                         'y':0.9,
                         'x':0.45,
                         'xanchor':'center',
                         'yanchor':'top'},
                  width=900,
                  height=600,
                  font=dict(family="Courier New, monospace",
                            size=14,
                            color="#111211")
                    )

Monthly Aspect Distribution

In [10]:
cleanliness_count = df[df['aspect'] == 'Cleanliness']['date'].value_counts().reset_index()

service_count = df[df['aspect'] == 'Staff/service']['date'].value_counts().reset_index()

location_count = df[df['aspect'] == 'Location']['date'].value_counts().reset_index()

facilities_count = df[df['aspect'] == 'Facilities']['date'].value_counts().reset_index()



date=df['date'].sort_values().dt.strftime('%B-%Y').unique().tolist()


fig = go.Figure(data=[
    go.Bar(name='Cleanliness',
           x=date,
           y=cleanliness_count.groupby(cleanliness_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           text=cleanliness_count.groupby(cleanliness_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           textposition='auto'),
    
    go.Bar(name='Staff/service',
           x=date,
           y=service_count.groupby(service_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           text=service_count.groupby(service_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           textposition='auto'),
    
    go.Bar(name='Location',
           x=date,
           y=location_count.groupby(location_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           text=location_count.groupby(location_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           textposition='auto'),

    go.Bar(name='Facilities',
           x=date,
           y=facilities_count.groupby(facilities_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           text=facilities_count.groupby(facilities_count['index'].dt.to_period('M')).sum()['date'].tolist(),
           textposition='auto')
])


fig.update_layout(barmode='stack', 
                  yaxis_title="Counts",
                  title={'text':'Monthly Aspect Distribution',
                         'y':0.9,
                         'x':0.5,
                         'xanchor':'center',
                         'yanchor':'top'},
                  width=950,
                  height=700,
                  font=dict(family="Courier New, monospace",
                            size=14,
                            color="#111211")
                    )


# Add slaider
fig.update_xaxes(rangeslider_visible=True)

# pyo.plot(fig)
fig.show(renderer="notebook")

Percentage Change of Aspects

In [11]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df['date'].sort_values().dt.strftime('%B-%Y').unique().tolist(),
                         y=(cleanliness_count.groupby(cleanliness_count['index'].dt.to_period('M')).sum()['date'].pct_change()*100).tolist(),
                    mode='lines+markers',
                    name='Cleanliness'))

fig.add_trace(go.Scatter(x=df['date'].sort_values().dt.strftime('%B-%Y').unique().tolist(),
                         y=(service_count.groupby(service_count['index'].dt.to_period('M')).sum()['date'].pct_change()*100).tolist(),
                    mode='lines+markers',
                    name='Staff/service'))

fig.add_trace(go.Scatter(x=df['date'].sort_values().dt.strftime('%B-%Y').unique().tolist(),
                         y=(location_count.groupby(location_count['index'].dt.to_period('M')).sum()['date'].pct_change()*100).tolist(),
                    mode='lines+markers',
                    name='Location'))

fig.add_trace(go.Scatter(x=df['date'].sort_values().dt.strftime('%B-%Y').unique().tolist(),
                         y=(facilities_count.groupby(facilities_count['index'].dt.to_period('M')).sum()['date'].pct_change()*100).tolist(),
                    mode='lines+markers',
                    name='Facilities'))


fig.update_layout(yaxis_title="%",
                  title={'text':'Percentage Change of Aspects',
                         'y':0.9,
                         'x':0.45,
                         'xanchor':'center',
                         'yanchor':'top'},
                  width=900,
                  height=600,
                  font=dict(family="Courier New, monospace",
                            size=14,
                            color="#111211")
                    )




fig.show(renderer='notebook')

Adjective Distribution by Sentiment

In [12]:
fig = make_subplots(rows=1, cols=2)

fig.add_trace(go.Bar(x=sorted(df_pos['adjective'].unique().tolist()),
                     y=df_pos['adjective'].value_counts().sort_index(),
                     name='Positive',
                     text=df_pos['adjective'].value_counts().sort_index(),
                     textposition='auto'), 1, 1)

fig.add_trace(go.Bar(x=sorted(df_neg['adjective'].unique().tolist()),
                     y=df_neg['adjective'].value_counts().sort_index(),
                     name='Negative',
                     text=df_neg['adjective'].value_counts().sort_index(),
                     textposition='auto'), 1, 2)


fig.update_layout(title={'text':'Adjective Distribution by Sentiment',
                           'y':0.9,
                           'x':0.5,
                           'xanchor':'center',
                           'yanchor':'top'},
                    width=950,
                    height=700,
                    font=dict(family="Courier New, monospace",
                            size=14,
                            color="#111211")
                    )




fig.show(renderer='notebook')

Review Count Change

In [13]:
fig = go.Figure(go.Waterfall(
    name = "20",
    orientation = "v",
    x = df['date'].sort_values().dt.strftime('%B-%Y').unique().tolist()[1:],
    y = df.groupby(df['date'].dt.to_period('M')).count()['sentiment'].diff().sort_index().tolist()[1:],
    text = df.groupby(df['date'].dt.to_period('M')).count()['sentiment'].diff().sort_index().tolist()[1:],
    base = 2691,
    textposition = "outside",
))


fig.update_layout(title={'text':'Review Count Change',
                           'y':0.9,
                           'x':0.5,
                           'xanchor':'center',
                           'yanchor':'top'},
                    width=950,
                    height=700,
                    font=dict(family="Courier New, monospace",
                            size=14,
                            color="#111211"),
                    waterfallgroupgap = 0.1,
                    yaxis_title="Review Count"
                    )




fig.show(renderer='notebook')
In [ ]: